from gapminder import gapminder
import pandas as pd
import plotly.express as px
from plotly.offline import init_notebook_mode
init_notebook_mode(connected = True)
pd.set_option('display.float_format', '{:.2f}'.format)
gapminder.head()
| country | continent | year | lifeExp | pop | gdpPercap | |
|---|---|---|---|---|---|---|
| 0 | Afghanistan | Asia | 1952 | 28.80 | 8425333 | 779.45 |
| 1 | Afghanistan | Asia | 1957 | 30.33 | 9240934 | 820.85 |
| 2 | Afghanistan | Asia | 1962 | 32.00 | 10267083 | 853.10 |
| 3 | Afghanistan | Asia | 1967 | 34.02 | 11537966 | 836.20 |
| 4 | Afghanistan | Asia | 1972 | 36.09 | 13079460 | 739.98 |
gapminder.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1704 entries, 0 to 1703 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 country 1704 non-null object 1 continent 1704 non-null object 2 year 1704 non-null int64 3 lifeExp 1704 non-null float64 4 pop 1704 non-null int64 5 gdpPercap 1704 non-null float64 dtypes: float64(2), int64(2), object(2) memory usage: 80.0+ KB
gapminder.describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| year | 1704.00 | 1979.50 | 17.27 | 1952.00 | 1965.75 | 1979.50 | 1993.25 | 2007.00 |
| lifeExp | 1704.00 | 59.47 | 12.92 | 23.60 | 48.20 | 60.71 | 70.85 | 82.60 |
| pop | 1704.00 | 29601212.32 | 106157896.74 | 60011.00 | 2793664.00 | 7023595.50 | 19585221.75 | 1318683096.00 |
| gdpPercap | 1704.00 | 7215.33 | 9857.45 | 241.17 | 1202.06 | 3531.85 | 9325.46 | 113523.13 |
print(f"We have data from {gapminder['year'].min()} to {gapminder['year'].max()}.")
We have data from 1952 to 2007.
We have life expectancy, population and GDP per capita data from 1952 to 2007. Let's filter the data with year 2007 and explore it.
gapminder_2007 = gapminder[gapminder["year"] == 2007]
fig = px.scatter(data_frame = gapminder_2007, x = "gdpPercap", y = "lifeExp", color="continent")
fig.update_layout(title = "2007 data: Life Expectancy vs GDP Per Capita", xaxis_title = "GDP Per Capita",
yaxis_title = "Life Expectancy")
fig.show("notebook")
fig = px.scatter(data_frame = gapminder_2007, x = "gdpPercap", y = "lifeExp", color = "continent", size = "pop",
hover_name= "country")
fig.update_layout(title = "2007 data: Life Expectancy vs GDP Per Capita", xaxis_title = "GDP Per Capita",
yaxis_title = "Life Expectancy")
fig.show("notebook")
fig = px.scatter(data_frame = gapminder_2007, x = "pop", y = "lifeExp", color = "continent",
log_x = True, hover_name = "country")
fig.update_layout(title = "2007 data: Life Expectancy vs Population", xaxis_title = "Population",
yaxis_title = "Life Expectancy")
fig.show("notebook")
Above graph shows that Oceania, Europe and Americas Countries has high life Expectancy and GDP Per Capita as compared to Asia and Africa countries.
Now, let's explore how life expectancy varies between continents. We will use the same filtered data of 2007 year.
mean_life_exp = gapminder_2007.groupby("continent", as_index = False)["lifeExp"].mean()
mean_life_exp
| continent | lifeExp | |
|---|---|---|
| 0 | Africa | 54.81 |
| 1 | Americas | 73.61 |
| 2 | Asia | 70.73 |
| 3 | Europe | 77.65 |
| 4 | Oceania | 80.72 |
fig = px.bar(data_frame = mean_life_exp, x = "continent", y = "lifeExp", color = "continent")
fig.update_layout(title = "2007 data: Mean Life Expectancy of continents", xaxis_title = "Continent",
yaxis_title = "Mean Life Expectancy", xaxis = {'categoryorder': 'total descending'})
fig.show("notebook")
Oceania has the highest mean life expectancy followed by Europe, Americas and Asia. Africa has worst mean life expectancy of all.
# Countries with worst Life Expectancy in each continent
min_life_exp_ind = gapminder_2007.groupby("continent")["lifeExp"].agg("idxmin").to_list()
gapminder.iloc[min_life_exp_ind, :].reset_index(drop = True)
| country | continent | year | lifeExp | pop | gdpPercap | |
|---|---|---|---|---|---|---|
| 0 | Swaziland | Africa | 2007 | 39.61 | 1133066 | 4513.48 |
| 1 | Haiti | Americas | 2007 | 60.92 | 8502814 | 1201.64 |
| 2 | Afghanistan | Asia | 2007 | 43.83 | 31889923 | 974.58 |
| 3 | Turkey | Europe | 2007 | 71.78 | 71158647 | 8458.28 |
| 4 | New Zealand | Oceania | 2007 | 80.20 | 4115771 | 25185.01 |
# Countries with best Life Expectancy in each continent
max_life_exp_ind = gapminder_2007.groupby("continent")["lifeExp"].agg("idxmax").to_list()
gapminder.iloc[max_life_exp_ind, :].reset_index(drop = True)
| country | continent | year | lifeExp | pop | gdpPercap | |
|---|---|---|---|---|---|---|
| 0 | Reunion | Africa | 2007 | 76.44 | 798094 | 7670.12 |
| 1 | Canada | Americas | 2007 | 80.65 | 33390141 | 36319.24 |
| 2 | Japan | Asia | 2007 | 82.60 | 127467972 | 31656.07 |
| 3 | Iceland | Europe | 2007 | 81.76 | 301931 | 36180.79 |
| 4 | Australia | Oceania | 2007 | 81.23 | 20434176 | 34435.37 |
# Distrubution of Life Expectancy
fig = px.box(data_frame = gapminder_2007, x = "continent", y = "lifeExp", color = "continent")
fig.update_layout(title = "2007 data: Life Expectancy of continents", xaxis_title = "Continent",
yaxis_title = "Life Expectancy")
fig.show("notebook")
Life Expectancy varies more in Africa than other continent. Median Life Expectancy is also lowest among all.
# Distrubution of GDP Per Capita
fig = px.box(data_frame = gapminder_2007, x = "continent", y = "gdpPercap", color = "continent")
fig.update_layout(title = "2007 data: GDP Per Capita of continents", xaxis_title = "Continent",
yaxis_title = "GDP Per Capita")
fig.show("notebook")
GDP Per Capita varies more in Asia and European countries as compared to countries in other continents.
Let's look at life expectancy, population and GDP per capita data of India.
gapminder_india = gapminder[gapminder["country"] == "India"]
gapminder_india
| country | continent | year | lifeExp | pop | gdpPercap | |
|---|---|---|---|---|---|---|
| 696 | India | Asia | 1952 | 37.37 | 372000000 | 546.57 |
| 697 | India | Asia | 1957 | 40.25 | 409000000 | 590.06 |
| 698 | India | Asia | 1962 | 43.60 | 454000000 | 658.35 |
| 699 | India | Asia | 1967 | 47.19 | 506000000 | 700.77 |
| 700 | India | Asia | 1972 | 50.65 | 567000000 | 724.03 |
| 701 | India | Asia | 1977 | 54.21 | 634000000 | 813.34 |
| 702 | India | Asia | 1982 | 56.60 | 708000000 | 855.72 |
| 703 | India | Asia | 1987 | 58.55 | 788000000 | 976.51 |
| 704 | India | Asia | 1992 | 60.22 | 872000000 | 1164.41 |
| 705 | India | Asia | 1997 | 61.77 | 959000000 | 1458.82 |
| 706 | India | Asia | 2002 | 62.88 | 1034172547 | 1746.77 |
| 707 | India | Asia | 2007 | 64.70 | 1110396331 | 2452.21 |
fig = px.line(data_frame = gapminder_india, x = "year", y = "lifeExp", markers = True)
fig.update_layout(title = "Life Expectancy of India", xaxis_title = "Year",
yaxis_title = "Life Expectancy")
fig.show("notebook")
fig = px.line(data_frame = gapminder_india, x = "year", y = "pop", markers = True)
fig.update_layout(title = "Population of India", xaxis_title = "Year",
yaxis_title = "Population")
fig.show("notebook")
fig = px.line(data_frame = gapminder_india, x = "year", y = "gdpPercap", markers = True)
fig.update_layout(title = "GDP Per Capita of India", xaxis_title = "Year",
yaxis_title = "GDP Per Capita")
fig.show("notebook")
Life Expectancy of India has increased from 37.373 years to 64.698 years.
Population of India has increased dramtically from 372 Million to 1.11 Billion.
GDP Per Capita has increased from 546.56 to 2452.21.
corr = gapminder[["lifeExp", "pop", "gdpPercap"]].corr()
corr
| lifeExp | pop | gdpPercap | |
|---|---|---|---|
| lifeExp | 1.00 | 0.06 | 0.58 |
| pop | 0.06 | 1.00 | -0.03 |
| gdpPercap | 0.58 | -0.03 | 1.00 |
fig = px.imshow(corr, zmin = -1, zmax = 1, text_auto = ".2f")
fig.show("notebook")
GDP Per Capita and Life Expectancy has high positive correlation between them.
country_list = ["United States", "United Kingdom", "Canada", "France", "Spain", "Australia", "Japan"]
select_country = gapminder[gapminder["country"].isin(country_list)]
fig = px.line(data_frame = select_country, x = "year", y = "gdpPercap", color = "country", color_discrete_sequence=px.colors.colorbrewer.Set2)
fig.update_layout(title = "GDP Per Capita", xaxis_title = "Year",
yaxis_title = "GDP Per Capita")
fig.show("notebook")
United States has highest GDP Per Capita from 1952 to 2007 following by Canada. GDP Per Capita is trending similar in European Countries.